import re
import urllib.request
import time
time.sleep(3)
import pandas
import emoji

headers = {'User-Agent':
           'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'}
data = pandas.read_csv('D://nanfang/南方周末.csv',usecols=['url','title'])
url = data['url']
print(list(url))
newscomment = []

for i in range(len(data)):
    newsurl = data['url'][i]
    html = urllib.request.Request(url=newsurl, headers=headers)
    open = urllib.request.urlopen(html).read().decode('utf-8')
    pat = '"link_id":(\d+)'
    commentid = re.compile(pat, re.DOTALL).findall(open)

    for h in commentid:
        comment_url = 'http://www.infzm.com/contents/'+h+'/comments'

        comment_html = urllib.request.Request(url=comment_url, headers=headers)
        comment_open = urllib.request.urlopen(comment_html).read().decode('utf-8')
        comment_pat = 'comment_content":"(.*?)"'
        comment = re.compile(comment_pat).findall(comment_open)
        newscomment.append(comment)
        print(len(newscomment))

news = {'url': url, 'comment': newscomment}
all_data = pandas.DataFrame(news)
all_data.to_csv('D:/nanfang/comment.csv', encoding='utf-8-sig')
print(all_data)


